Dream | coef = age.
expression_dir = "~/ad-omics_hydra/microglia_omics/expression_tables/added_pilot_314s/expr_4brain_regions/"
#work_plots = "~/pd-omics/katia/scripts/GitHub_scripts/glia_omics/3rd_pass_mic_255s/age_related/age_Dream_3rd/"
work_plots = "~/ad-omics/ricardo/tmp/age_interaction/"
metadata_path = "~/pd-omics/katia/Microglia/mic_255s/metadata_files/"load(paste0(expression_dir, "Expression_filt_255s.Rdata"))
# dim(genes_counts_exp_3rd) # 19376 255
load(paste0(metadata_path, "metadata_255filt_eng_29jun2020.Rdata"))
# str(metadata3rd_pass)metadata <- metadata3rd_pass
ageByDonor = unique(metadata[,c("donor_id", "age", "sex")])
ggplot(ageByDonor, aes(x=age, fill=age)) +
geom_histogram(bins = 25, colour='black', position = "stack", fill="#5c88da99") +
labs(x="Age", y="Donors") +
scale_y_continuous(breaks = seq(0,20,2.5)) +
scale_x_continuous(breaks=seq(20,120,10)) +
theme_classic()To use cause_of_death in the model, I got the NAs and change it for “Other” category.
To use C1-C4 I calculated the median for the missing samples.
params = BiocParallel::MulticoreParam(workers=10, progressbar=T)
register(params)
registerDoParallel(10)
metadata$cause_of_death_categories[metadata$cause_of_death_categories %in% NA] <- "Other"
#table(metadata$cause_of_death_categories)
metadata$C1 = metadata$C1 %>% replace_na(median(metadata$C1, na.rm = T))
metadata$C2 = metadata$C2 %>% replace_na(median(metadata$C2, na.rm = T))
metadata$C3 = metadata$C3 %>% replace_na(median(metadata$C3, na.rm = T))
metadata$C4 = metadata$C4 %>% replace_na(median(metadata$C4, na.rm = T))
# Matching the names from the DE analysis to use the same code
genes_counts4deg = genes_counts_exp_3rd
metadata4deg = metadata
# all(colnames(genes_counts_exp_3rd) == metadata$donor_tissue) # Check the order of columns - TRUE
# The dream model operates directly on the results of voom.
# The only change compared to the standard limma workflow is to replace lmFit with dream.
# Check variance partition version
# packageVersion("variancePartition") # Must be 1.17.7
# The variable to be tested should be a fixed effect
form <- ~ sex + (1|donor_id) + (1|cause_of_death_categories) + C1 + C2 + C3 + C4 + picard_pct_mrna_bases + picard_summed_median + picard_pct_ribosomal_bases + age * tissue
# estimate weights using linear mixed model of dream
vobjDream = suppressWarnings( voomWithDreamWeights( genes_counts4deg, form, metadata4deg ) ) # supressing messages because of Biocparallel was generating a lot of messages
# Fit the dream model on each gene
# By default, uses the Satterthwaite approximation for the hypothesis test
metadata4deg$tissue = relevel(metadata4deg$tissue, ref = "MFG")
fitmm_MFGref = suppressWarnings (dream( vobjDream, form, metadata4deg ))
metadata4deg$tissue = relevel(metadata4deg$tissue, ref = "STG")
fitmm_STGref = suppressWarnings (dream( vobjDream, form, metadata4deg ))
metadata4deg$tissue = relevel(metadata4deg$tissue, ref = "SVZ")
fitmm_SVZref = suppressWarnings (dream( vobjDream, form, metadata4deg ))
metadata4deg$tissue = relevel(metadata4deg$tissue, ref = "THA")
fitmm_THAref = suppressWarnings (dream( vobjDream, form, metadata4deg ))
save(vobjDream, fitmm_MFGref,fitmm_STGref,fitmm_SVZref,fitmm_THAref, metadata4deg, genes_counts4deg, file = paste0(work_plots,"/age_interaction_dreamObj.RData"))load(paste0(work_plots,"/age_interaction_dreamObj.RData"))
MFG_0.01 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_MFGref, adjust.method = "BH", p.value = 0.01))))[c(1,3),])
MFG_0.05 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_MFGref, adjust.method = "BH", p.value = 0.05))))[c(1,3),])
MFG_0.10 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_MFGref, adjust.method = "BH", p.value = 0.10))))[c(1,3),])
STG_0.01 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_STGref, adjust.method = "BH", p.value = 0.01))))[c(1,3),])
STG_0.05 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_STGref, adjust.method = "BH", p.value = 0.05))))[c(1,3),])
STG_0.10 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_STGref, adjust.method = "BH", p.value = 0.10))))[c(1,3),])
SVZ_0.01 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_SVZref, adjust.method = "BH", p.value = 0.01))))[c(1,3),])
SVZ_0.05 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_SVZref, adjust.method = "BH", p.value = 0.05))))[c(1,3),])
SVZ_0.10 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_SVZref, adjust.method = "BH", p.value = 0.10))))[c(1,3),])
THA_0.01 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_THAref, adjust.method = "BH", p.value = 0.01))))[c(1,3),])
THA_0.05 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_THAref, adjust.method = "BH", p.value = 0.05))))[c(1,3),])
THA_0.10 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_THAref, adjust.method = "BH", p.value = 0.10))))[c(1,3),])
df_res_0.01 = data.frame(MFG = c(NA,MFG_0.01["age:tissueSTG"],MFG_0.01["age:tissueSVZ"],MFG_0.01["age:tissueTHA"]),
STG = c(STG_0.01["age:tissueMFG"],NA,STG_0.01["age:tissueSVZ"],STG_0.01["age:tissueTHA"]),
SVZ = c(SVZ_0.01["age:tissueMFG"],SVZ_0.01["age:tissueSTG"],NA,SVZ_0.01["age:tissueTHA"]),
THA = c(THA_0.01["age:tissueMFG"],THA_0.01["age:tissueSTG"],THA_0.01["age:tissueSVZ"],NA))
rownames(df_res_0.01) = c("MFG","STG","SVZ","THA")
ComplexHeatmap::Heatmap(as.matrix(df_res_0.01),
name = "DE FDR<1%",
cell_fun = function(j, i, x, y, width, height, fill) {
grid.text(sprintf("%d", df_res_0.01[i, j]), x, y, gp = gpar(fontsize = 10))},
cluster_columns = F,
cluster_rows = F,
col = colorRamp2(c(min(df_res_0.01,na.rm = T),max(df_res_0.01,na.rm = T)), c("white", "#CB454A")), rect_gp = gpar(col = "white", lwd = 2) )df_res_0.05 = data.frame(MFG = c(NA,MFG_0.05["age:tissueSTG"],MFG_0.05["age:tissueSVZ"],MFG_0.05["age:tissueTHA"]),
STG = c(STG_0.05["age:tissueMFG"],NA,STG_0.05["age:tissueSVZ"],STG_0.05["age:tissueTHA"]),
SVZ = c(SVZ_0.05["age:tissueMFG"],SVZ_0.05["age:tissueSTG"],NA,SVZ_0.05["age:tissueTHA"]),
THA = c(THA_0.05["age:tissueMFG"],THA_0.05["age:tissueSTG"],THA_0.05["age:tissueSVZ"],NA))
rownames(df_res_0.05) = c("MFG","STG","SVZ","THA")
ComplexHeatmap::Heatmap(as.matrix(df_res_0.05),
name = "DE FDR<5%",
cell_fun = function(j, i, x, y, width, height, fill) {
grid.text(sprintf("%d", df_res_0.05[i, j]), x, y, gp = gpar(fontsize = 10))},
cluster_columns = F,
cluster_rows = F,
col = colorRamp2(c(min(df_res_0.05,na.rm = T),max(df_res_0.05,na.rm = T)), c("white", "#CB454A")), rect_gp = gpar(col = "white", lwd = 2) )df_res_0.10 = data.frame(MFG = c(NA,MFG_0.10["age:tissueSTG"],MFG_0.10["age:tissueSVZ"],MFG_0.10["age:tissueTHA"]),
STG = c(STG_0.10["age:tissueMFG"],NA,STG_0.10["age:tissueSVZ"],STG_0.10["age:tissueTHA"]),
SVZ = c(SVZ_0.10["age:tissueMFG"],SVZ_0.10["age:tissueSTG"],NA,SVZ_0.10["age:tissueTHA"]),
THA = c(THA_0.10["age:tissueMFG"],THA_0.10["age:tissueSTG"],THA_0.10["age:tissueSVZ"],NA))
rownames(df_res_0.10) = c("MFG","STG","SVZ","THA")
ComplexHeatmap::Heatmap(as.matrix(df_res_0.10),
name = "DE FDR<10%",
cell_fun = function(j, i, x, y, width, height, fill) {
grid.text(sprintf("%d", df_res_0.10[i, j]), x, y, gp = gpar(fontsize = 10))},
cluster_columns = F,
cluster_rows = F,
col = colorRamp2(c(min(df_res_0.10,na.rm = T),max(df_res_0.10,na.rm = T)), c("white", "#CB454A")), rect_gp = gpar(col = "white", lwd = 2) )res_age_MFG_SVZ <- data.frame(topTable(fitmm_MFGref, coef='age:tissueSVZ', number=nrow(genes_counts4deg)), check.names = F) %>% rownames_to_column("ensembl") %>% mutate(comparison = "MFG_SVZ")
res_age_MFG_THA <- data.frame(topTable(fitmm_MFGref, coef='age:tissueTHA', number=nrow(genes_counts4deg)), check.names = F) %>% rownames_to_column("ensembl") %>% mutate(comparison = "MFG_THA")
res_age_MFG_STG <- data.frame(topTable(fitmm_MFGref, coef='age:tissueSTG', number=nrow(genes_counts4deg)), check.names = F) %>% rownames_to_column("ensembl") %>% mutate(comparison = "MFG_STG")
res_age_STG_SVZ <- data.frame(topTable(fitmm_STGref, coef='age:tissueSVZ', number=nrow(genes_counts4deg)), check.names = F) %>% rownames_to_column("ensembl") %>% mutate(comparison = "STG_SVZ")
res_age_STG_THA <- data.frame(topTable(fitmm_STGref, coef='age:tissueTHA', number=nrow(genes_counts4deg)), check.names = F) %>% rownames_to_column("ensembl") %>% mutate(comparison = "STG_THA")
res_age_SVZ_THA <- data.frame(topTable(fitmm_SVZref, coef='age:tissueTHA', number=nrow(genes_counts4deg)), check.names = F) %>% rownames_to_column("ensembl") %>% mutate(comparison = "SVZ_THA")
res_age = rbind(res_age_MFG_SVZ,res_age_MFG_THA,res_age_MFG_STG,res_age_STG_SVZ,res_age_STG_THA,res_age_SVZ_THA)
## Get conversion table for Gencode 30
gencode_30 = read.table("~/pd-omics/katia/ens.geneid.gencode.v30", header = T)
colnames(gencode_30) = c("ensembl","symbol")
res_name = merge(res_age, gencode_30, by="ensembl")
res_name = res_name[order(res_name$adj.P.Val), ]
write.table(x = res_name, file = paste0(work_plots,"all_comparisons.txt"), quote = F, sep = "\t", row.names = F, col.names = T)
createDT(res_name %>% filter(adj.P.Val<0.05))Expression in log10(TPM)
metadata4deg$tissue = factor(metadata4deg$tissue, levels = c("MFG","STG","SVZ","THA"))
# After running the limma you can extract the results using the topTable function.
# Here I pick the gene name from the top DE gene
top5Gene = rownames(topTable(fitmm_MFGref, coef = "age:tissueSVZ"))[1:5]
# Now get the expression of the gene after running regressing covariates
geneExp = t(log10(genes_tpm_exp_3rd[top5Gene,]+1))
rownames(gencode_30) = gencode_30$ensembl
colnames(geneExp) = gencode_30[colnames(geneExp),"symbol"]
geneExp = as.data.frame(geneExp)
# Add the expression as another column in the metadata table (samples in both tables should be in the same order!)
meta_tmp = cbind(metadata4deg, geneExp)
meta_tmp_m = melt(meta_tmp, measure.vars = c(colnames(geneExp)))
lm_eqn <- function(df, y, x){
formula = as.formula(sprintf('%s ~ %s', y, x))
m <- lm(formula, data=df);
# formating the values into a summary string to print out
# ~ give some space, but equal size and comma need to be quoted
eq <- substitute(italic(target) == a + b %.% italic(input)*","~~italic(r)^2~"="~r2*","~~p~"="~italic(pvalue),
list(target = y,
input = x,
a = format(as.vector(coef(m)[1]), digits = 2),
b = format(as.vector(coef(m)[2]), digits = 2),
r2 = format(summary(m)$r.squared, digits = 3),
# getting the pvalue is painful
pvalue = format(summary(m)$coefficients[2,'Pr(>|t|)'], digits=1)
)
)
as.character(as.expression(eq));
}
# Plot the expression of the gene.
library(ggpmisc)
ggplot(meta_tmp_m, aes(x = age, y = value, color = tissue)) +
geom_point() +
scale_color_futurama() +
easy_labs(x = "Age", y = expression(paste(log[10],"(TPM+1)"))) +
geom_smooth(method='lm', alpha = .3, se = F) +
stat_poly_eq(aes(label = paste(..rr.label..)),
rr.digits = 2,
label.x = 0.1, label.y = .98,
formula = y ~ x, parse = TRUE, size = 4) +
facet_grid(variable ~ tissue, scales = "free") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
easy_remove_legend()
ggsave(paste0(work_plots,"/top5_agerelated_TPM.pdf"), width = 10, height = 10)Same thing but with residuals (z-scored)
# Get the residuals after adjusting for covariates
residuals = removeBatchEffect(vobjDream$E, # data after running voom
batch = metadata4deg$tissue, # add one categorical covariate
batch2 = paste(metadata4deg$sex,"_",metadata4deg$cause_of_death_categories), # add a second categorical covariate ( if you want )
# here you can force the regression to NOT remove effects of some variable of interest
#design = model.matrix(~ + age + tissue, data = metadata4deg),
# add the column with continuous covariates
covariates = metadata4deg %>% dplyr::select(picard_pct_mrna_bases, picard_summed_median, picard_pct_ribosomal_bases, C1 , C2 , C3 , C4))
residuals_zscore = t(scale(t(residuals)))
# Now get the expression of the gene after running regressing covariates
geneExp = t(residuals_zscore[top5Gene,]+1)
rownames(gencode_30) = gencode_30$ensembl
colnames(geneExp) = gencode_30[colnames(geneExp),"symbol"]
geneExp = as.data.frame(geneExp)
# Add the expression as another column in the metadata table (samples in both tables should be in the same order!)
meta_tmp = cbind(metadata4deg, geneExp)
meta_tmp_m = melt(meta_tmp, measure.vars = c(colnames(geneExp)))
# Plot the expression of the gene.
library(ggpmisc)
ggplot(meta_tmp_m, aes(x = age, y = value, color = tissue)) +
geom_point() +
scale_color_futurama() +
easy_labs(x = "Age", y = expression(paste("Residual expression (Z-score)"))) +
geom_smooth(method='lm', alpha = .3, se = F) +
stat_poly_eq(aes(label = paste(..rr.label..)),
rr.digits = 2,
label.x = 0.1, label.y = .98,
formula = y ~ x, parse = TRUE, size = 4) +
facet_grid(variable ~ tissue, scales = "free") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
easy_remove_legend()
ggsave(paste0(work_plots,"/top5_agerelated_residual.pdf"), width = 10, height = 10)